{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 金融 证券 图谱搭建"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 导入命令"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-28T05:48:19.517559Z",
     "start_time": "2020-01-28T05:48:19.514553Z"
    }
   },
   "outputs": [],
   "source": [
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-28T05:54:47.570890Z",
     "start_time": "2020-01-28T05:54:47.564920Z"
    }
   },
   "outputs": [],
   "source": [
    "file_ls = os.listdir('../data/output')\n",
    "node_ls =  [file for file in file_ls if file.startswith('node')]\n",
    "relationship_ls =  [file for file in file_ls if file.startswith('rel')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['import.report',\n",
       " 'node_city-header.csv',\n",
       " 'node_city.csv',\n",
       " 'node_companies-header.csv',\n",
       " 'node_companies.csv',\n",
       " 'node_funds-header.csv',\n",
       " 'node_funds.csv',\n",
       " 'node_industries-header.csv',\n",
       " 'node_industries.csv',\n",
       " 'node_managers-header.csv',\n",
       " 'node_managers.csv',\n",
       " 'node_province-header.csv',\n",
       " 'node_province.csv',\n",
       " 'rel_city_in_province-header.csv',\n",
       " 'rel_city_in_province.csv',\n",
       " 'rel_company_in_city-header.csv',\n",
       " 'rel_company_in_city.csv',\n",
       " 'rel_fund_has_custodian-header.csv',\n",
       " 'rel_fund_has_custodian.csv',\n",
       " 'rel_fund_has_management-header.csv',\n",
       " 'rel_fund_has_management.csv',\n",
       " 'rel_fund_listed_company_portfolio-header.csv',\n",
       " 'rel_fund_listed_company_portfolio.csv',\n",
       " 'rel_listed_company_has_manager-header.csv',\n",
       " 'rel_listed_company_has_manager.csv',\n",
       " 'rel_share_in_industry-header.csv',\n",
       " 'rel_share_in_industry.csv']"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "file_ls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-28T06:11:51.962998Z",
     "start_time": "2020-01-28T06:11:51.955995Z"
    }
   },
   "outputs": [],
   "source": [
    "def generate_scripts(path,database='graph.db'):\n",
    "    file_ls = os.listdir(path)\n",
    "    node_ls =  [file for file in file_ls if file.startswith('node')]\n",
    "    relationship_ls =  [file for file in file_ls if file.startswith('rel')]\n",
    "\n",
    "    print(\"neo4j-admin import --database={}  \\\\\".format(database))\n",
    "    for i in range(int(len(node_ls)/2)):\n",
    "        print(\"--nodes {},{}  \\\\\".format(node_ls[2*i],node_ls[2*i+1]))\n",
    "\n",
    "    for i in range(int(len(relationship_ls)/2)):\n",
    "        print(\"--relationships {},{}  \\\\\".format(relationship_ls[2*i],relationship_ls[2*i+1]))\n",
    "    \n",
    "    print(\"--ignore-duplicate-nodes=true  \\\\\")\n",
    "    print(\"--ignore-missing-nodes=true\")    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-28T06:11:52.416678Z",
     "start_time": "2020-01-28T06:11:52.410676Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "neo4j-admin import --database=finance_graph.db  \\\n",
      "--nodes node_city-header.csv,node_city.csv  \\\n",
      "--nodes node_companies-header.csv,node_companies.csv  \\\n",
      "--nodes node_funds-header.csv,node_funds.csv  \\\n",
      "--nodes node_industries-header.csv,node_industries.csv  \\\n",
      "--nodes node_managers-header.csv,node_managers.csv  \\\n",
      "--nodes node_province-header.csv,node_province.csv  \\\n",
      "--relationships rel_city_in_province-header.csv,rel_city_in_province.csv  \\\n",
      "--relationships rel_company_in_city-header.csv,rel_company_in_city.csv  \\\n",
      "--relationships rel_fund_has_custodian-header.csv,rel_fund_has_custodian.csv  \\\n",
      "--relationships rel_fund_has_management-header.csv,rel_fund_has_management.csv  \\\n",
      "--relationships rel_fund_listed_company_portfolio-header.csv,rel_fund_listed_company_portfolio.csv  \\\n",
      "--relationships rel_listed_company_has_manager-header.csv,rel_listed_company_has_manager.csv  \\\n",
      "--relationships rel_share_in_industry-header.csv,rel_share_in_industry.csv  \\\n",
      "--ignore-duplicate-nodes=true  \\\n",
      "--ignore-missing-nodes=true\n"
     ]
    }
   ],
   "source": [
    "generate_scripts(path='../data/output',database='finance_graph.db')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/src/notebooks/scr'"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pwd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`neo4j console` 默认 启动的是 `graph.db`, 当你有多可图数据时，可以采用软链接的方式\n",
    "\n",
    ">ln -s finance_graph.db graph.db"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  },
  "toc": {
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": "block",
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
